Stock Price Forecast
Using Technical Analysis and Machine Learning
Part 1. Technical Analysis
This section provides a comprehensive technical analysis of stock prices using multiple indicators. For each stock, I calculate key metrics:
- Trend Indicators:
MA50/MA200: 50-day and 200-day Simple Moving Averages compare current price to medium/long-term trends (Price > MA = bullish +1, Price < MA = bearish -1)
EMA20: 20-day Exponential Moving Average gives more weight to recent prices for short-term trend direction
- Momentum Indicators:
RSI (14-day): Measures overbought (>70 = -1) vs. oversold (<30 = +1) conditions, with 30-70 being neutral
MACD: Signal line crossover indicator (MACD > 0 = +1 bullish momentum, MACD < 0 = -1 bearish momentum)
- Volatility & Volume Indicators:
Bollinger Bands: Price above upper band = overbought (-1), below lower band = oversold (+1), within bands = neutral (0)
MFI (Money Flow Index): Volume-weighted RSI (>80 = -1 overbought, <20 = +1 oversold, 20-80 = neutral)
Each indicator is scored (+1 for bullish, -1 for bearish, 0 for neutral), with a final aggregate signal determining the overall market outlook (Positive, Negative, or Neutral).
Code
# Load necessary libraries
library(tidyverse)
library(dplyr)
library(tidyquant)
library(TTR)
library(xgboost)
library(plotly)
library(gt)
library(gtExtras)
library(readr)
# 1. Load Data
# Assuming the same directory structure
df <- read_csv("data/cleaned/stock_prices.csv")
df$date <- as.Date(df$date)
df <- df %>% arrange(ticker, date)Code
# Define function to calculate indicators
calculate_indicators <- function(data) {
# Create the HLC matrix required by TTR functions
hlc <- data %>% select(high, low, close)
data %>%
mutate(
SMA_50 = SMA(close, n = 50),
SMA_200 = SMA(close, n = 200),
EMA_20 = EMA(close, n = 20),
RSI_14 = RSI(close, n = 14),
# FIX: Pass HLC as one argument and volume as the second
MFI_14 = MFI(hlc, volume = data$volume, n = 14)
) %>%
# MACD returns multiple columns
bind_cols(as_tibble(MACD(data$close, 12, 26, 9, maType="EMA"))) %>%
# Bollinger Bands
bind_cols(as_tibble(BBands(data$close, n = 20, sd = 2)))
}
# Apply calculations to all tickers
df_ta <- df %>%
group_by(ticker) %>%
filter(n() > 200) %>%
group_modify(~ calculate_indicators(.x)) %>%
ungroup()
# Scoring Logic
df_ta <- df_ta %>%
mutate(
MA50_Score = case_when(close > SMA_50 ~ 1, close < SMA_50 ~ -1, TRUE ~ 0),
MA200_Score = case_when(close > SMA_200 ~ 1, close < SMA_200 ~ -1, TRUE ~ 0),
EMA_Score = case_when(close > EMA_20 ~ 1, close < EMA_20 ~ -1, TRUE ~ 0),
MACD_Score = case_when(macd > signal ~ 1, macd < signal ~ -1, TRUE ~ 0),
RSI_Score = case_when(RSI_14 < 30 ~ 1, RSI_14 > 70 ~ -1, TRUE ~ 0),
BB_Score = case_when(close < dn ~ 1, close > up ~ -1, TRUE ~ 0),
MFI_Score = case_when(MFI_14 < 20 ~ 1, MFI_14 > 80 ~ -1, TRUE ~ 0)
) %>%
mutate(
Total_Score = MA50_Score + MA200_Score + EMA_Score + MACD_Score + RSI_Score + BB_Score + MFI_Score,
Signal = case_when(Total_Score > 0 ~ "Positive", Total_Score < 0 ~ "Negative", TRUE ~ "Neutral")
)
# Generate Summary Table
summary_df <- df_ta %>%
group_by(ticker) %>%
filter(date == max(date)) %>%
select(ticker, close, volume, SMA_50, SMA_200, EMA_20, macd, RSI_14, up, dn, MFI_14,
contains("Score"), Signal)
# Display styled table using gt
summary_df %>%
gt() %>%
tab_header(title = "Technical Analysis Summary - Latest Signals") %>%
fmt_number(columns = c(close, SMA_50, SMA_200, EMA_20, up, dn), decimals = 2) %>%
fmt_number(columns = volume, suffixing = TRUE) %>%
gt_color_rows(Total_Score, palette = c("red", "yellow", "green")) %>%
tab_style(
style = cell_fill(color = "#d4edda"),
locations = cells_body(columns = Signal, rows = Signal == "Positive")
) %>%
tab_style(
style = cell_fill(color = "#f8d7da"),
locations = cells_body(columns = Signal, rows = Signal == "Negative")
)| Technical Analysis Summary - Latest Signals | ||||||||||||||||||
| close | volume | SMA_50 | SMA_200 | EMA_20 | macd | RSI_14 | up | dn | MFI_14 | MA50_Score | MA200_Score | EMA_Score | MACD_Score | RSI_Score | BB_Score | MFI_Score | Total_Score | Signal |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AAPL | ||||||||||||||||||
| 248.68 | 20.37M | 269.78 | 234.74 | 259.27 | -2.36946399 | 23.54549 | 279.58 | 243.59 | 25.31303 | -1 | 1 | -1 | -1 | 1 | 0 | 0 | -1 | Negative |
| ADBE | ||||||||||||||||||
| 302.73 | 1.71M | 331.50 | 357.65 | 318.17 | -3.79778065 | 34.05754 | 369.17 | 281.19 | 32.94964 | -1 | -1 | -1 | -1 | 0 | 0 | 0 | -4 | Negative |
| AMZN | ||||||||||||||||||
| 239.68 | 17.27M | 232.37 | 220.30 | 236.04 | 0.59766662 | 55.47537 | 248.51 | 225.03 | 61.55384 | 1 | 1 | 1 | -1 | 0 | 0 | 0 | 2 | Positive |
| BAC | ||||||||||||||||||
| 51.58 | 14.33M | 53.90 | 48.30 | 53.79 | -1.23120016 | 34.56166 | 58.15 | 51.02 | 45.30107 | -1 | 1 | -1 | -1 | 0 | 0 | 0 | -2 | Negative |
| DIS | ||||||||||||||||||
| 111.56 | 4.05M | 109.76 | 110.60 | 112.44 | 0.39737151 | 48.28802 | 115.90 | 110.75 | 59.20544 | 1 | 1 | -1 | -1 | 0 | 0 | 0 | 0 | Neutral |
| GOOGL | ||||||||||||||||||
| 328.39 | 12.24M | 312.72 | 229.67 | 323.91 | 1.80127254 | 58.32054 | 339.59 | 306.99 | 58.85994 | 1 | 1 | 1 | -1 | 0 | 0 | 0 | 2 | Positive |
| HD | ||||||||||||||||||
| 383.07 | 996.84K | 356.24 | 370.44 | 368.76 | 2.17222348 | 65.62475 | 395.07 | 332.06 | 75.39303 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 4 | Positive |
| JNJ | ||||||||||||||||||
| 218.29 | 2.19M | 206.78 | 176.21 | 212.80 | 1.79453697 | 67.65978 | 222.64 | 199.76 | 60.93761 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 4 | Positive |
| JPM | ||||||||||||||||||
| 297.76 | 4.51M | 312.95 | 288.98 | 313.03 | -1.20877563 | 34.77893 | 340.64 | 296.11 | 45.10625 | -1 | 1 | -1 | -1 | 0 | 0 | 0 | -2 | Negative |
| KO | ||||||||||||||||||
| 72.26 | 5.75M | 70.56 | 69.11 | 70.70 | 0.70710718 | 62.89778 | 72.89 | 67.56 | 62.38649 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 4 | Positive |
| MA | ||||||||||||||||||
| 525.59 | 1.74M | 553.92 | 561.10 | 550.38 | -1.56460317 | 30.93978 | 598.33 | 519.88 | 35.03904 | -1 | -1 | -1 | -1 | 0 | 0 | 0 | -4 | Negative |
| META | ||||||||||||||||||
| 661.78 | 11.57M | 639.49 | 676.81 | 640.07 | -0.82320725 | 57.21962 | 683.17 | 605.76 | 56.27555 | 1 | -1 | 1 | 1 | 0 | 0 | 0 | 2 | Positive |
| MSFT | ||||||||||||||||||
| 469.30 | 19.93M | 480.94 | 482.82 | 468.51 | -1.67824555 | 48.27677 | 498.43 | 445.76 | 42.65325 | -1 | -1 | 1 | -1 | 0 | 0 | 0 | -2 | Negative |
| NFLX | ||||||||||||||||||
| 86.00 | 34.57M | 97.70 | 113.01 | 89.75 | -3.64960133 | 31.64944 | 96.02 | 84.00 | 20.03264 | -1 | -1 | -1 | -1 | 0 | 0 | 0 | -4 | Negative |
| NVDA | ||||||||||||||||||
| 187.94 | 80.23M | 183.83 | 165.69 | 184.93 | 0.08988118 | 54.29832 | 191.68 | 180.92 | 46.66409 | 1 | 1 | 1 | -1 | 0 | 0 | 0 | 2 | Positive |
| PG | ||||||||||||||||||
| 150.58 | 6.52M | 144.97 | 153.20 | 145.17 | 0.73180086 | 64.85683 | 150.08 | 138.06 | 66.20983 | 1 | -1 | 1 | 1 | 0 | -1 | 0 | 1 | Positive |
| PYPL | ||||||||||||||||||
| 56.91 | 4.05M | 60.13 | 67.19 | 57.83 | -2.06688642 | 42.09407 | 60.67 | 55.21 | 35.16651 | -1 | -1 | -1 | 1 | 0 | 0 | 0 | -2 | Negative |
| TSLA | ||||||||||||||||||
| 448.26 | 34.98M | 442.38 | 372.75 | 444.51 | -0.77553181 | 51.28174 | 475.57 | 416.33 | 55.61414 | 1 | 1 | 1 | -1 | 0 | 0 | 0 | 2 | Positive |
| UNH | ||||||||||||||||||
| 355.18 | 3.11M | 331.31 | 332.55 | 340.37 | 1.29486565 | 63.18486 | 355.43 | 323.15 | 58.84521 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 4 | Positive |
| V | ||||||||||||||||||
| 327.00 | 2.25M | 338.03 | 344.07 | 336.65 | -1.43186372 | 35.02471 | 367.72 | 316.84 | 40.38805 | -1 | -1 | -1 | -1 | 0 | 0 | 0 | -4 | Negative |
| WMT | ||||||||||||||||||
| 118.40 | 8.19M | 112.18 | 102.04 | 116.44 | 1.71530489 | 60.19195 | 122.17 | 108.92 | 27.87472 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 4 | Positive |
Part 2. Machine Learning
1. Overview
Building upon the technical analysis in Part 1, this section utilizes Machine Learning (XGBoost) to predict stock prices. Unlike traditional indicators that give simple Buy/Sell signals, the ML model analyzes the complex relationships between historical patterns (RSI, MACD, Moving Averages) to forecast the exact closing price of the next trading day.
2. Methodology
Algorithm: We use XGBRegressor (Extreme Gradient Boosting), a robust algorithm highly effective for structured time-series data.
Feature Engineering: The model inputs include Open, High, Low, Volume, and all technical indicators calculated in Part 1 (SMA, EMA, Bollinger Bands, etc.).
Training & Validation: To prevent “data leakage” (looking into the future), the data is split chronologically:
Training Set (First 80%): Used to teach the model historical patterns.
Test Set (Last 20%): Used to evaluate how well the model predicts unseen data.
3. Interactive Analysis Dashboard
The visualization below provides a comprehensive view of the model’s performance. Use the dropdown menu to select a specific ticker:
Top Chart (Actual vs. Predicted): Compares the real market price (Blue line) against the model’s prediction (Orange dotted line). The closer the lines, the better the model accuracy.
Bottom Chart (Feature Importance): Ranks which technical indicators were most influential in determining the price. For example, if RSI has a high bar, the model relies heavily on momentum to make predictions for that specific stock.
Note: The table below summarizes the prediction for the next upcoming trading day, including the predicted percentage change.
Code
# Prepare features
feature_cols <- c("open", "high", "low", "close", "volume",
"SMA_50", "SMA_200", "EMA_20", "RSI_14",
"macd", "signal", "up", "mavg", "dn", "pctB", "MFI_14")
ml_results <- list()
ticker_preds <- list()
for (t in unique(df_ta$ticker)) {
ticker_data <- df_ta %>% filter(ticker == t) %>% drop_na(any_of(feature_cols))
# Create Target (Shifted Close)
ticker_data$target <- lead(ticker_data$close)
# Split data
train_df <- ticker_data %>% drop_na(target)
split_idx <- floor(0.8 * nrow(train_df))
train_set <- train_df[1:split_idx, ]
test_set <- train_df[(split_idx + 1):nrow(train_df), ]
# XGBoost Matrices
dtrain <- xgb.DMatrix(data = as.matrix(train_set[, feature_cols]), label = train_set$target)
dtest <- xgb.DMatrix(data = as.matrix(test_set[, feature_cols]), label = test_set$target)
# Train Model
model <- xgboost(data = dtrain, nrounds = 100, objective = "reg:squarederror",
eta = 0.05, max_depth = 5, verbose = 0)
# Predict on test set
preds <- predict(model, dtest)
rmse_val <- sqrt(mean((test_set$target - preds)^2))
# Predict Next Day
latest_data <- tail(ticker_data, 1)
next_pred <- predict(model, as.matrix(latest_data[, feature_cols]))
ml_results[[t]] <- tibble(
Ticker = t,
Current_Price = latest_data$close,
Predicted_Price = next_pred,
Change_Pct = ((next_pred - latest_data$close) / latest_data$close) * 100,
RMSE = rmse_val
)
}
final_ml_results <- bind_rows(ml_results)
# Display ML Table
final_ml_results %>%
gt() %>%
fmt_percent(columns = Change_Pct, scale_values = FALSE) %>%
fmt_number(columns = c(Current_Price, Predicted_Price, RMSE), decimals = 2)| Ticker | Current_Price | Predicted_Price | Change_Pct | RMSE |
|---|---|---|---|---|
| AAPL | 248.68 | 236.12 | −5.05% | 18.80 |
| ADBE | 302.73 | 306.16 | 1.13% | 8.27 |
| AMZN | 239.68 | 228.56 | −4.64% | 6.58 |
| BAC | 51.58 | 43.85 | −14.99% | 6.28 |
| DIS | 111.56 | 110.37 | −1.07% | 2.68 |
| GOOGL | 328.39 | 188.99 | −42.45% | 68.56 |
| HD | 383.07 | 379.56 | −0.92% | 10.66 |
| JNJ | 218.29 | 157.80 | −27.71% | 27.65 |
| JPM | 297.76 | 259.16 | −12.96% | 37.55 |
| KO | 72.26 | 67.56 | −6.51% | 2.29 |
| MA | 525.59 | 524.06 | −0.29% | 21.06 |
| META | 661.78 | 656.55 | −0.79% | 39.42 |
| MSFT | 469.30 | 424.53 | −9.54% | 64.67 |
| NFLX | 86.00 | 86.38 | 0.44% | 19.34 |
| NVDA | 187.94 | 127.50 | −32.16% | 43.80 |
| PG | 150.58 | 151.22 | 0.42% | 2.29 |
| PYPL | 56.91 | 56.10 | −1.43% | 1.57 |
| TSLA | 448.26 | 415.91 | −7.22% | 31.28 |
| UNH | 355.18 | 417.20 | 17.46% | 96.35 |
| V | 327.00 | 328.23 | 0.38% | 9.99 |
| WMT | 118.40 | 97.62 | −17.55% | 9.10 |
Code
# Interactive Plotly Chart for the first ticker as an example
plot_ly(data = df_ta %>% filter(ticker == "AAPL")) %>%
add_lines(x = ~date, y = ~close, name = "Actual Price") %>%
layout(title = "Stock Price Overview", xaxis = list(title = "Date"), yaxis = list(title = "Price"))